/******************************************************************************
Input(s): Manufacturing_1977_2001.dta

This .do file has 9 sections.	
******************************************************************************/

clear programs
clear all
set more off
set matsize 11000

global data "/FILE PATH GOES HERE/"
global boots "/FILE PATH GOES HERE/"

sysdir set PERSONAL "/FILE PATH GOES HERE/"

/*SECTION 1: 
	- Industry-Level Misallocation Measures and Business Dynamism
*/
local S1 = 0
/*SECTION 2: 
	- Re-estimate model on the bottom 95% of firms by size in each industry
*/
local S2 = 0
/*SECTION 3: 
	- Re-measure misallocation with Atkeson-Burstein markups
*/
local S3 = 0
/*SECTION 4: 
	- Output new misallocation measures with Atkeson-Burstein markups
*/
local S4 = 0



/*********************************************************
SECTION 1: 
	- Industry-Level Misallocation Measures and Business Dynamism
**********************************************************/
if `S1' == 1 {
/*Baseline + Robustness: CMF Sample*/
use "$data/Misallocation_Industry_CM_vrts_1acf_sest_5y.dta", clear
		
/*Robustness: FK4*/
gen naics4 = substr(fk,1,4)
	merge m:1 naics4 year using "$data/Misallocation_Industry_CM_vrts_1acf_sest_5y_4.dta"
		drop _m

/*Keeping only industry measures measures*/
keep year fk_naics02 M_i_* alpha* sigma* M_*

/*Merge VA shares*/
merge 1:1 fk_naics02 year using "$data/VA_Shares_fk.dta"		
	drop _m
	
/*Averaging NAICS6 to NAICS4 misallocation*/
gen naics4 = substr(fk,1,4)
foreach var of varlist M_i*5y {
	gen temp = va_fk if `var'!=.
	bys year naics4: egen temp_total = sum(temp)
	gen share = temp/temp_total
		drop temp temp_total
		
	gen temp = share*log(`var')
	by year naics4: egen temp_total = sum(temp)
	
	local name = substr("`var'",4,.)
	
	gen ave_M_i`name' = exp(temp_total)
		drop temp temp_total share
	}

/*Net Counterfactuals*/
foreach var of varlist ave_* M_i*4 {
	replace `var' = `var'-1
	}

/*Merging in Business Dynamics Statistics*/
egen ty = tag(year)
destring naics4, gen(vcnaics4)
merge m:1 vcnaics4 year using "/FILE PATH GOES HERE/bds2018_vcnaics4.dta"

	drop if _m==2
	drop _m
	
	destring firms-firmdeath_emp, force replace
	
collapse (mean) ave_* M_i*4 firms-firmdeath_emp (sum)va_fk, by(naics4 year)

gen miss = 1+M_i_ASM_CM_vrts_1acf_sest_5y_4
gen ave_miss = 1+ave_M_i_ASM_CM_vrts_1acf_sest_5y

egen group_naics4 = group(naics4)
tsset group_naics4 year

gen collapse_year = 1 if year<=1982
	replace collapse_year = 2 if year>=1983 & year<=1987
	replace collapse_year = 3 if year>=1988 & year<=1992
	replace collapse_year = 4 if year>=1993 & year<=1997
	replace collapse_year = 5 if year>=1998 & year<=2002
	replace collapse_year = 6 if year>=2003 & year<=2007
	
collapse (mean) miss ave_miss reallocation_rate estabs_entry_rate estabs_exit_rate va, by(group_naics4 collapse_year)

tsset group_naics4 collapse_year
rename collapse_year year

foreach var of varlist ave_miss miss estabs_entry_rate estabs_exit_rate reallocation_rate {
	
	gen d1_`var' = `var'-l1.`var'
	
	}

putexcel set "/FILE PATH GOES HERE/Misallocation_revision_202011.xls", sheet("Business Dynamics") modify
putexcel A1 = "Five-Year Changes in Misallocation and in Business Dynamics"

putexcel B2 = "Job Reallocation Rate"

putexcel D2 = "Establishment Entry Rate"

putexcel F2 = "Establishment Exit Rate"

putexcel A3 = "Industry Misallocation: NAICS4"
putexcel A6 = "Industry Misallocation: NAICS6 Aggregated to NAICS4"

putexcel A10 = "Number of Observations"
putexcel A11 = "R Squared"

reghdfe d1_reallocation_rate d1_lmiss, ab(year group_naics4) nocons vce(robust)
	matrix REG = r(table)
	svmat REG, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	
	mkmat reg*, mat(REG) nomiss
	drop reg*
		matrix OUT1 = REG[1..3,1]
		
	matrix SUM = [e(N) \ e(r2)]
	svmat SUM, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	mkmat reg*, mat(SUM) nomiss
	drop reg*
	
	local col "B"
	putexcel `col'3 = matrix(OUT1)
	putexcel `col'10 = matrix(SUM)
	
	
reghdfe d1_reallocation_rate d1_lave_miss, ab(year group_naics4) nocons vce(robust)
	matrix REG = r(table)
	svmat REG, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	
	mkmat reg*, mat(REG) nomiss
	drop reg*
		matrix OUT1 = REG[1..3,1]
		
	matrix SUM = [e(N) \ e(r2)]
	svmat SUM, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	mkmat reg*, mat(SUM) nomiss
	drop reg*
	
	local col "C"
	putexcel `col'6 = matrix(OUT1)
	putexcel `col'10 = matrix(SUM)

reghdfe d1_estabs_entry_rate d1_lmiss, ab(year group_naics4) nocons  vce(robust)
	matrix REG = r(table)
	svmat REG, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	
	mkmat reg*, mat(REG) nomiss
	drop reg*
		matrix OUT1 = REG[1..3,1]
		
	matrix SUM = [e(N) \ e(r2)]
	svmat SUM, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	mkmat reg*, mat(SUM) nomiss
	drop reg*
	
	local col "D"
	putexcel `col'3 = matrix(OUT1)
	putexcel `col'10 = matrix(SUM)
	
reghdfe d1_estabs_entry_rate d1_lave_miss, ab(year group_naics4) nocons  vce(robust)
	matrix REG = r(table)
	svmat REG, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	
	mkmat reg*, mat(REG) nomiss
	drop reg*
		matrix OUT1 = REG[1..3,1]
		
	matrix SUM = [e(N) \ e(r2)]
	svmat SUM, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	mkmat reg*, mat(SUM) nomiss
	drop reg*
	
	local col "E"
	putexcel `col'6 = matrix(OUT1)
	putexcel `col'10 = matrix(SUM)
	
	
reghdfe d1_estabs_exit_rate d1_lmiss, ab(year group_naics4) nocons vce(robust)
	matrix REG = r(table)
	svmat REG, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	
	mkmat reg*, mat(REG) nomiss
	drop reg*
		matrix OUT1 = REG[1..3,1]
		
	matrix SUM = [e(N) \ e(r2)]
	svmat SUM, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	mkmat reg*, mat(SUM) nomiss
	drop reg*
	
	local col "F"
	putexcel `col'3 = matrix(OUT1)
	putexcel `col'10 = matrix(SUM)
	
reghdfe d1_estabs_exit_rate d1_lave_miss, ab(year group_naics4) nocons vce(robust)
	matrix REG = r(table)
	svmat REG, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	
	mkmat reg*, mat(REG) nomiss
	drop reg*
		matrix OUT1 = REG[1..3,1]
		
	matrix SUM = [e(N) \ e(r2)]
	svmat SUM, n(reg)
	foreach var of varlist reg* {
		replace `var' = sign(`var')*round(abs(`var'),10^(floor(log10(abs(`var')))-3))
		tostring `var', replace force usedisplayformat
		destring `var', force replace
	}
	mkmat reg*, mat(SUM) nomiss
	drop reg*
	
	local col "G"
	putexcel `col'6 = matrix(OUT1)
	putexcel `col'10 = matrix(SUM)

	
}

/*********************************************************
SECTION 2: 
	- Re-estimate AB model using only bottom 95% of firms
**********************************************************/
if `S2' == 1 {
global nrep 10
/*Create dataset to hold summary of bootstrapping*/
!gunzip /FILE PATH GOES HERE/estimation_dataset.dta.gz
use "$boots/estimation_pairs.dta", clear
	local vnames "bl bk by sigma aL aK"
	foreach name of local vnames {
		gen `name' = .
		}
save "$boots/estimation_pairs_LP_ky_AB2.dta", replace
quietly sum estimation_pair
	local pmax = r(max)
	
*forvalues num = 1/`pmax' {
forvalues num = 1/750 752/`pmax' {
	/*Open the estimation dataset*/
	use "$boots/estimation_dataset.dta", clear

	/*Keep only the data needed for this round of estimation*/
	keep if estimation_pair==`num'
	
	/*Keep the bottom 95% of firms by size*/
	egen tl = tag(lbdnum)
	bys lbd: egen va_mean = mean(PY2)
	
	gen temp = va_mean if tl==1
	egen va_sum= sum(temp)
	gen va_share = PY2/va_sum
	quietly sum va_share if tl==1, d
	local p95 = r(p95)
	keep if va_share<=`p95'
		drop va_sum va_share
	
	save "$boots/estimation_temp_AB2.dta", replace
	
	matrix A = J(1,3,99)
	
		quietly tsset lbd year
		quietly sum L_elasticity_BLS_5y
		global aLprog = r(mean)
		do "$boots/0_gmm_ky_AB.do"
		
		matrix A[1,1] = 1
		matrix A[1,2] = vcoef[1,1]
		matrix A[1,3] = vcoef[1,2]
		
	/*Find mean and relevant percentiles of estimated coefficients*/
	matrix coln A = iter_num bk by
	svmat A, name(col)
	
	keep iter_num bk by
	keep if iter_num!=.
	keep bk by
	gen bl = $aLprog
	gen estimation_pair = `num'
	
	merge 1:1 estimation_pair using "$boots/estimation_pairs_LP_ky_AB2.dta"
		drop _m
		order estimation_pair fk period bl bk by
		sort estimation_pair
		replace sigma = 1/by
		replace aL = bl*sigma/(sigma-1)
		replace aK = bk*sigma/(sigma-1)
	
	save "$boots/estimation_pairs_LP_ky_AB2.dta", replace
	
	disp("DONE WITH ITERATION `num'")
	}

erase "$boots/estimation_temp_AB2.dta"
}


/*********************************************************
SECTION 3: 
	- Re-measure AB misallocation
**********************************************************/
if `S3' == 1 {
	
local code "/FILE PATH GOES HERE/"
local trim "own"
local type "5y"
local num = 1

	/*Replicating the old AB measures*/
	*Estimated Parameters
	global sigma "sigma_`num'acf_vrts_`type'"
	global alphaL "aL_`num'acf_vrts_`type'"
	global alphaK "aK_`num'acf_vrts_`type'"
	global fname "vrts_sest_AB1"
	global pftype "`type'"
	global dtrim "`trim'"
	
	do "`code'2_c_Misallocation_CF_VMarkups_AB1.do"
	
	*CRTS ACF with sigma = 3*
	global sigma "sigma_3"
	global alphaL "aL_`num'acf_crts_`type'"
	global alphaK "aK_`num'acf_crts_`type'"
	global fname "crts_s3_AB1"
	global pftype "`type'"
	global dtrim "`trim'"
	
	do "`code'2_c_Misallocation_CF_VMarkups_AB1.do"

	/*Creating the new AB measures*/
	*Estimated Parameters
	global sigma "sigma_`num'acf_vrts_`type'"
	global alphaL "aL_`num'acf_vrts_`type'"
	global alphaK "aK_`num'acf_vrts_`type'"
	global fname "vrts_sest_AB2"
	global pftype "`type'"
	global dtrim "`trim'"
	
	do "`code'2_c_Misallocation_CF_VMarkups_AB2.do"
	
	*CRTS ACF with sigma = 3*
	global sigma "sigma_3"
	global alphaL "aL_`num'acf_crts_`type'"
	global alphaK "aK_`num'acf_crts_`type'"
	global fname "crts_s3_AB2"
	global pftype "`type'"
	global dtrim "`trim'"
	
	do "`code'2_c_Misallocation_CF_VMarkups_AB2.do"
	
	}

	

/*********************************************************
SECTION 4: 
	- Output new AB misallocation measures
**********************************************************/
if `S4' == 1 {
	
use "$data/Misallocation_Industry_VM_vrts_sest_AB1.dta", clear
	merge 1:1 fk_naics02 year using "$data/Misallocation_Industry_VM_crts_s3_AB1.dta"
		drop _m
	merge 1:1 fk_naics02 year using "$data/Misallocation_Industry_VM_vrts_sest_AB2.dta"
		drop _m
	merge 1:1 fk_naics02 year using "$data/Misallocation_Industry_VM_crts_s3_AB2.dta"
		drop _m

merge 1:1 fk_naics02 year using "$data/VA_Shares_fk.dta"		
	drop _m
	
foreach var of varlist M_i*AB* {
	gen temp = va_fk if `var'!=.
	bys year: egen temp_total = sum(temp)
	gen share = temp/temp_total
		drop temp temp_total
		
	gen temp = share*log(`var')
	by year: egen temp_total = sum(temp)
	
	local name = substr("`var'",4,.)
	
	gen tM`name' = exp(temp_total)
		drop temp temp_total share
	}

/*Net Counterfactuals*/
foreach var of varlist tM* M* {
	replace `var' = `var'-1
	}

/*Output Aggregate Misallocation Time Series*/

egen ty = tag(year)
keep if ty==1
keep year M_ASM* M_CMF* tM_ASM* tM_CMF*
foreach var of varlist M* tM* {
	tostring `var', replace force
	gen temp = strpos(`var',".")
		replace temp = . if temp==0
	replace temp = temp+4
	replace `var' = substr(`var',1,temp)
	destring `var', replace
	drop temp
	}
keep if (year==1982 | year==1987 | year == 1992 | year == 1997 | year == 2002 | year == 2002 | year==2007) 
br year tM_ASM_VM_vrts_sest_AB1 tM_ASM_VM_crts_s3_AB1 tM_ASM_VM_vrts_sest_AB2 tM_ASM_VM_crts_s3_AB2

mkmat tM_ASM_VM_vrts_sest_AB2 tM_ASM_VM_crts_s3_AB2, mat(miss)

putexcel set "/FILE PATH GOES HERE/Misallocation_revision_202011.xls", sheet("Aggregates") modify
	
	putexcel E2 = "Variable RTS, Heterogeneous Markups"
	putexcel F2 = "Constant RTS, Heterogeneous Markups"
	putexcel E4 = mat(miss)
	
}
	
